# clear labels for versions
versions <- tibble(
version = c("v1", "v2", "v3", "v4", "v5", "v6", "v7"),
vlabel = factor(
c( "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
"$\\beta$ Centered at Emp. Value",
"$P(S_1|untested)$ Centered at Emp. Value",
"$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values",
"$\\beta$ Centered at Emp. Value (Spline Smoothed)",
"$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values ($\\beta$ Spline Smoothed)",
"$\\,$CTIS Priors that Do Not Vary by State or Date$\\,$"
),
levels = c(
"$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
"$\\beta$ Centered at Emp. Value",
"$P(S_1|untested)$ Centered at Emp. Value",
"$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values",
"$\\beta$ Centered at Emp. Value (Spline Smoothed)",
"$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values ($\\beta$ Spline Smoothed)",
"$\\,$CTIS Priors that Do Not Vary by State or Date$\\,$"
) )
)
# read state-level results
targets_dir <- here("_targets")
state_res <- tar_read(state_v1,
store =targets_dir) %>%
bind_rows(
tar_read(state_v2,
store =targets_dir)
) %>%
bind_rows(
tar_read(state_v3,
store =targets_dir)
) %>%
bind_rows(
tar_read(state_v4,
store =targets_dir)
)%>%
bind_rows(
tar_read(state_v5,
store =targets_dir)
)%>%
bind_rows(
tar_read(state_v6,
store =targets_dir)
) %>%
bind_rows(
tar_read(state_v7,
store =targets_dir)
)
covidestim <- tar_read(covidestim_biweekly_state,
store =targets_dir)
dates <- readRDS(here("data/data_raw/date_to_biweek.RDS"))
codes <- read_csv(here('data/demographic/statecodes.csv'))
state_res <- state_res %>%
rename(state=fips) %>%
left_join(versions) %>%
mutate(state=toupper(state)) %>%
left_join(covidestim, relationship= "many-to-many") %>%
left_join(dates, relationship = "many-to-many") %>%
rename(name=state_name) %>%
group_by(biweek) %>%
mutate(mindate=min(date),
maxdate=max(date)) %>%
ungroup()
if(filter_versions){
state_res <- state_res %>%
filter(version %in% c("v3", "v5", "v6", "v7"))
}
theme_c <- function(...){
# font <- "Helvetica" #assign font family up front
theme_bw() %+replace% #replace elements we want to change
theme(
#text elements
plot.title = element_text( #title
size = 16, #set font size
face = 'bold', #bold typeface
hjust = .5,
vjust = 3),
plot.subtitle = element_text( #subtitle
size = 12,
hjust = .5,
face = 'italic',
vjust = 3), #font size
axis.title = element_text( #axis titles
size = 12), #font size
axis.text = element_text( #axis text
size = 9),
legend.text = element_text(size = 12),
# t, r, b, l
plot.margin = unit(c(1,.5,.5,.5), "cm"),
legend.position = "right",
strip.text.x = element_text(size = 11, face = "bold", color="white"),
strip.text.y = element_text(size = 11, face = "bold", color="white",angle=270),
strip.background = element_rect(fill = "#3E3D3D")
) %+replace%
theme(...)
}
Summarizing Concordance with Covidestim
# corrected %>%
# mutate(in_interval = case_when(
# exp_cases_lb <= infections & infections <= exp_cases_ub ~ "In Interval",
# exp_cases_lb > infections ~ "Below Interval",
# exp_cases_ub < infections ~ "Above Interval"
# )) %>%
# filter(!is.na(in_interval)) %>%
# group_by(in_interval, state, vlabel) %>%
# summarize(n_per_county=n()) %>%
# group_by(vlabel, in_interval) %>%
# summarize(n_per_version = sum(n_per_county)) %>%
# group_by(vlabel) %>%
# mutate(total = sum(n_per_version)) %>%
# ungroup() %>%
# mutate(prop=n_per_version/total)
by_version <- state_res %>%
# filter(biweek >=6) %>%
select(-date) %>%
distinct() %>%
mutate(in_interval = case_when(
exp_cases_lb <= infections & infections <= exp_cases_ub ~ "In Interval",
exp_cases_lb > infections ~ "Below Interval",
exp_cases_ub < infections ~ "Above Interval"
)) %>%
filter(!is.na(in_interval)) %>%
group_by(in_interval, vlabel) %>%
summarize(n=n()) %>%
group_by(vlabel) %>%
mutate(total = sum(n)) %>%
mutate(prop=n/total)
labels <- by_version %>%
arrange(prop) %>%
pull(vlabel) %>%
as.character() %>%
unique()
by_version %>%
mutate(in_interval = factor(in_interval,
levels = c("Above Interval",
"In Interval",
"Below Interval"
))) %>%
ggplot(aes(x= fct_reorder(vlabel,prop,max),
fill = in_interval, y =prop)) +
geom_bar(stat="identity",position="stack") +
theme_c() +
coord_flip()+ scale_fill_manual(values=c("In Interval" = "#79D2AF",
"Above Interval" = "#D2AF79",
"Below Interval"="#7997D2"),
breaks = c("Below Interval",
"In Interval",
"Above Interval")) +
labs(x="",
y= "Proportion",
fill = "Covidestim Median:",
title = "Proportion of Simulation Intervals Where Covidestim Median\nWas Within, Above, or Below the Median") +
theme_c() +
theme_c(legend.position="right",
legend.title = element_text(face="bold", size = 15),
legend.text= element_text( size = 15),
legend.spacing.y = unit(3, 'pt'),
axis.text.y = element_text(size = 17, hjust=1)) +
theme(plot.title = element_text(size = 20),
plot.subtitle = element_text(size=18,
face='italic',
margin=margin(4,0,4,0)),
axis.text.x=element_text(size=12),
axis.title = element_text(size=14)) +
scale_x_discrete(labels = (TeX(labels)))

ggsave(here('figures/concordance-covidestim-state.pdf'))
by_version %>%
group_by(vlabel) %>%
filter(in_interval == "In Interval") %>%
mutate(n_interval = n) %>%
ggplot(aes(y=fct_reorder(vlabel,prop), x=prop)) +
geom_text(aes(label=round(prop,3), x= prop+.03),
angle=0) +
geom_bar(stat="identity", fill="#596281") +
scale_y_discrete(breaks= unique(by_version$vlabel),
labels=function(x)TeX(x)) +
scale_x_continuous(expand=c(0,0),
limits=c(0,.9), n.breaks=7) +
theme_c(axis.text.y = element_text(size = 13, hjust=1),
axis.title.x = element_text(size=14),
axis.text.x=element_text(size=10)) +
labs(y="",
x="Proportion Containing the Covidestim Median",
title="Proportion Containing the Covidestim Median",
subtitle = "By Implementation")

Simulation Intervals Over Time
all_versions <- state_res %>%
group_by(state) %>%
summarize(state_n=n_distinct(version)) %>%
filter(state_n == max(state_n)) %>%
pull(state)
state_res %>%
filter(state %in% sample(unique(all_versions),5)) %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub),
alpha = .8,
show.legend=FALSE,
fill="#596281") +
geom_linerange(aes(xmin = mindate,
xmax=maxdate,
y = positive,
color = 'obs'),
size=.5) +
facet_grid(name~vlabel,
labeller=labeller(vlabel =as_labeller(
TeX, default=label_parsed)),
scales="free_y") +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "3 months",
date_labels = "%b %Y") +
theme_c(
legend.position = "top",
) +
scale_fill_manual(values = pal) +
labs(x="",
y = "Estimated Infections",
title = "Simulation Intervals Over Time",
subtitle = "By State") +
scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
name = '') +
guides(color = guide_legend(override.aes = list(linewidth =2)))
Compare Versions
subset <- state_res %>%
filter(vlabel == "$\\,$CTIS Priors that Do Not Vary by State or Date$\\,$")
pal <- c("#247C90", "red")
names(pal) <- c(as.character(unique(subset$vlabel)), "Covidestim")
state_res %>%
filter(state %in% sample(unique(all_versions),5)) %>%
filter(version %in% c("v2", "v5", "v6", "v7")) %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill=vlabel),
alpha = .8,
show.legend=FALSE #,
# fill="#596281"
) +
geom_linerange(aes(xmin = mindate,
xmax=maxdate,
y = positive,
color = 'obs'),
size=.5) +
facet_grid(name~vlabel,
labeller=labeller(vlabel =as_labeller(
TeX, default=label_parsed)),
scales="free_y") +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "3 months",
date_labels = "%b %Y") +
theme_c(
legend.position = "top",
) +
scale_fill_manual(values = pal) +
labs(x="",
y = "Estimated Infections",
title = "Simulation Intervals Over Time",
subtitle = "By State") +
scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
name = '') +
guides(color = guide_legend(override.aes = list(linewidth =2)))

Compare States
subset %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill = vlabel),
alpha = .8,
show.legend=FALSE) +
geom_ribbon(aes(x = date,
fill = 'Covidestim',
ymin = infections.lo,
ymax = infections.hi),
alpha = .6) +
facet_wrap(~name, ncol=4, scales = "free",
labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "3.5 months",
date_labels = "%b %Y") +
theme_c(axis.text.x=element_text(size=5),
axis.text.y = element_text(size=4.5),
legend.position = "top",
legend.text=element_text(size=12),
strip.text.x=element_text(size=8,
face='plain',
color='white')) +
scale_fill_manual(values =pal,
labels = TeX(names(pal)),
name=''
#,
# , breaks="Covidestim"
) +
labs(y = "Estimated Infections",
title = paste0("Estimated Infections by State")) +
guides(color = guide_legend(override.aes = list(linewidth =2),
nrow=2,
byrow=TRUE))

ggsave(here('figures/intervals-and-covidestim-all-states.pdf'))
Ratio of Estimated to Observed
subset %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb/positive,
ymax = exp_cases_ub/positive,
fill = vlabel),
alpha = .9,
show.legend=FALSE,
fill= "#247C90") +
facet_wrap(~state, ncol=5,
labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "3 months",
date_labels = "%b %Y") +
theme_c(axis.text.x=element_text(size=6),
axis.text.y = element_text(size=8),
legend.position = "top") +
scale_fill_manual(values =pal,
labels = TeX(names(pal)),
name='',
breaks="Covidestim") +
labs(y = "Ratio of Estimated Infections to Observed",
x="",
title = paste0("Ratio of Estimated Infections to Observed by State")) +
guides(color = guide_legend(override.aes = list(linewidth =2),
nrow=2,
byrow=TRUE))

ggsave(here('figures/ratio-estimated-to-observed-all-states.pdf'))
Comparing States at Specific Two-week Intervals
During Alpha Wave
plt1 <- subset %>%
# left_join(codes) %>%
filter(biweek == 7) %>%
ggplot(aes(y =fct_reorder(name,exp_cases_median/positive),
xmin = exp_cases_lb/positive,
xmax = exp_cases_ub/positive)) +
geom_errorbar() +
# geom_point(aes(y=state_name,x=exp_cases_median/positive),
# color = "darkred", size=.8, alpha=.5) +
theme_c() +
theme(axis.text.y=element_text(size=10),
axis.text.x = element_text(size=11),
axis.title = element_text(size = 17),
plot.subtitle = element_text(size=14),
plot.title = element_text(size=17, face="plain")) +
labs(y="", x="Ratio of Estimated Infections\nto Observed",
title ="During Two-week Period\nin the Alpha Wave",
subtitle = "March 26, 2021 through April 8, 2021") +
xlim(0,65)
plt1

During Delta Wave
subset %>%
mutate(ratio = exp_cases_median/positive) %>%
group_by(biweek) %>%
summarize(mean = mean(ratio),
mindate=min(date),
maxdate=max(date)) %>%
arrange(desc(mean))
plt2 <- subset %>%
# left_join(codes) %>%
filter(biweek == 13) %>%
ggplot(aes(y =fct_reorder(name,exp_cases_median/positive),
xmin = exp_cases_lb/positive,
xmax = exp_cases_ub/positive)) +
geom_errorbar() +
# geom_point(aes(y=state_name,x=exp_cases_median/positive),
# color = "darkred", size=.8, alpha=.5) +
theme_c() +
theme(axis.text.y=element_text(size=10),
axis.text.x = element_text(size=11),
axis.title = element_text(size = 17),
plot.subtitle = element_text(size=14),
plot.title = element_text(size=17, face="plain")) +
labs(y="", x="Ratio of Estimated Infections\nto Observed",
title ="During Two-week Period\nin the Delta Wave",
subtitle = "June 18, 2021 through July 1, 2021") +
xlim(0,65)
plt2

During Omicron Wave
subset %>%
left_join(codes) %>%
filter(biweek == 27) %>%
pull(date) %>%
range()
subset %>%
left_join(codes) %>%
filter(biweek == 13) %>%
pull(date) %>%
range()
plt3 <- subset %>%
# left_join(codes) %>%
filter(biweek == 27) %>%
ggplot(aes(y =fct_reorder(name,exp_cases_median/positive),
xmin = exp_cases_lb/positive,
xmax = exp_cases_ub/positive)) +
geom_errorbar() +
theme_c() +
theme(axis.text.y=element_text(size=10),
axis.text.x = element_text(size=11),
axis.title = element_text(size = 17),
plot.subtitle = element_text(size=14),
plot.title = element_text(size=17, face="plain")) +
labs(y="", x="Ratio of Estimated Infections\nto Observed",
title ="During Two-week Period\nin the Omicron Wave",
subtitle = "December 31, 2021 through January 14, 2022") +
xlim(0,65)
plt3

All Waves Together
plt <- plot_grid(plt1,plt2, plt3, nrow=1, align="none", labels="auto", label_size=19)
title <- ggplot() +
labs(title = "Ratio of Estimated Infections to Observed Infections") +
theme_c(plot.title=element_text(size=25))
plot_grid(title, plt, nrow=2,
rel_heights= c(.05,.95))

ggsave(here('figures/ratio-estimated-to-observed-multiple-waves.pdf'))
subset %>%
# left_join(codes) %>%
filter(biweek %in% c(7, 13, 27)) %>%
select(-date) %>%
distinct() %>%
group_by(state) %>%
mutate(ord = empirical_testpos[which(biweek==7)]) %>%
mutate(biweek=factor(biweek)) %>%
ggplot(aes(y=fct_reorder(state,ord),
x=empirical_testpos,fill=biweek)) +
geom_bar(stat="identity",position="dodge")+
theme_c() +
theme_c() +
labs(title = "Testing Positivity by State",
y="",
x="Biweekly Testing Positivity",
fill="") +
scale_x_continuous(n.breaks=10)

subset %>%
# left_join(codes) %>%
filter(biweek %in% c(7, 13, 27)) %>%
group_by(biweek) %>%
mutate(daterange= paste(format(min(date), "%B %d %Y"),
"to",
format(max(date), "%B %d %Y")),
daterange=factor(
daterange,
levels=c(
"March 26 2021 to April 08 2021",
"June 18 2021 to July 01 2021",
"December 31 2021 to January 14 2022"))) %>%
select(-date) %>%
distinct() %>%
group_by(state) %>%
mutate(testrate=total/population,
ord = testrate[which(biweek==7)]) %>%
mutate(biweek=factor(biweek)) %>%
ggplot(aes(y=fct_reorder(name,ord,.desc=TRUE),
x=testrate,fill=daterange)) +
geom_bar(stat="identity",position="dodge") +
theme_c() +
labs(title = "Testing Rate by State",
y="",
x="Biweekly Testing Rate",
fill="") +
scale_x_continuous(n.breaks=10)

compare_testrates <- subset %>%
mutate(testrate=total/population) %>%
select(-date) %>%
distinct() %>%
filter(biweek %in% c(7, 13, 27)) %>%
select(biweek,state,testrate) %>%
pivot_wider(names_from=biweek, values_from=testrate, names_prefix="biweek_") %>%
mutate(omicron_over_alpha = biweek_27/biweek_7,
omicron_over_delta = biweek_27/biweek_13) %>%
select(state,contains("omicron")) %>%
pivot_longer(c(omicron_over_alpha,omicron_over_delta)) %>%
group_by(name) %>%
summarize(mean_val = mean(value))
cat("The testing rate during this two-week interval during the omicron wave was", compare_testrates %>% filter(name=="omicron_over_alpha") %>% pull(mean_val), "times the testing rate during this time interval in the alpha wave.")
## The testing rate during this two-week interval during the omicron wave was 2.440368 times the testing rate during this time interval in the alpha wave.
cat("The testing rate during this two-week interval during the omicron wave was", compare_testrates %>% filter(name=="omicron_over_delta") %>% pull(mean_val), "times the testing rate during this time interval in the delta wave.")
## The testing rate during this two-week interval during the omicron wave was 4.930745 times the testing rate during this time interval in the delta wave.
Rank Ratio of Estimated to Observed Over Time
ranks <- subset %>%
mutate(ratio = exp_cases_median/positive,
tested = total/population) %>%
# one observation per biweek per state
select(biweek, date, ratio, name,tested) %>%
group_by(biweek,ratio,name,tested) %>%
summarize(date=min(date)) %>%
# rank for each time interval
group_by(biweek) %>%
mutate(rank = rank(ratio),
rank_tested=rank(-tested))
r <- ranks %>%
mutate(rank_group = case_when(
rank <= 10 ~ "Top 10",
rank > 41 ~ "Bottom 10",
TRUE ~ "Middle" )) %>%
group_by(name, rank_group) %>%
mutate(n=n()) %>%
group_by(name) %>%
mutate(total =n(),
max_group = rank_group[which.max(n)]) %>%
filter( max(n) >24) %>%
filter(max_group != "Middle") %>%
ungroup()
top_10 <- r %>% filter(rank_group=="Top 10") %>%
arrange(desc(n)) %>% pull(name) %>%
unique() %>%paste(collapse=", ")
cat("States that consistently have the lowest ratios:", top_10)
## States that consistently have the lowest ratios: Rhode Island, Massachusetts, District of Columbia, Alaska, New York, Vermont
bottom_10 <- r %>% filter(rank_group=="Bottom 10") %>%
arrange(desc(n)) %>% pull(name) %>%
unique() %>%paste(collapse=", ")
cat("States that consistently have the highest ratios:", bottom_10)
## States that consistently have the highest ratios: Mississippi, South Dakota, Oklahoma, Nebraska, Tennessee
end_labs <- r %>%
ungroup() %>%
filter(date==max(date)) %>%
mutate(date = date + days(10)) %>%
select(name, rank, date) %>%
ungroup()
##############################
# RANK PLOT OVER TIME
##############################
#
# set.seed(999)
#
# pal1 <-viridis_pal(option="viridis", end = .9, begin=.3)(3)
# pal2 <- viridis_pal(option="rocket", end = .9, begin=.3)(2)
# pal3 <- viridis_pal(option="magma", end = .9, begin=.3)(2)
# pal4 <- viridis_pal(option="mako", end = .8, begin=.3)(2)
# pal5 <- viridis_pal(option="cividis", end = .9, begin=.1)(3)
#
#
# rankpal <- c(pal1, pal2, pal3,pal4,pal5)
# indexes <- sample.int(length(rankpal), replace=FALSE)
# rankpal <- rankpal[indexes]
set.seed(123)
rankpal <- c("#b4ddd4", "#7b2c31", "#65d04b",
"#da239b", "#b7d165", "#453fbc",
"#658114", "#af3fe8", "#ffb947",
"#0a60a8", "#f87945", "#16d0ae")
rankpal <- c("#851657", "#be3acd", "#19a71f",
"#824598", "#ed820a", "#BB8E37",
"#974810", "#1f84ec", "#d02023",
"#059dc5", "#f23387", "#16d0ae")
indexes <- sample.int(length(rankpal), replace=FALSE)
rankpal <- rankpal[indexes]
r %>%
# filter(rank_group!="Middle") %>%
ggplot() +
geom_point(aes(x=date,y=rank,
group=name,
color= name),
size=.5) +
geom_line(aes(x=date,y=rank,
group=name,
color= name)) +
# facet_wrap(~max_group) +
ggrepel::geom_text_repel( aes(x= date-days(4),
y = rank,
color=name,
label = name),
end_labs,
min.segment.length=2,
nudge_y=0,
hjust=0,
size=3,
direction="y",
force_pull=9,
box.padding=.15) +
theme_c(legend.position="none",
axis.text.x = element_text(angle =0,size=14)) +
# theme(plot.subtitle =element_text(size=18),
# axis.title.x=element_text(size=16),
# axis.title.y=element_text(size=16)) +
# scale_color_brewer(palette="Dark2") +
scale_color_manual(values=rankpal) +
scale_x_date(date_breaks="2 months", date_labels = "%b %Y",
limits = c(ymd("2021-01-01"),
ymd("2022-04-15"))) +
labs(y = "Rank of Ratio",
title = "Rank of Estimated Infections to Observed Infections Over Time",
subtitle = "For States Ranked in the Top or Bottom 10\nfor More Than 80% of Time Intervals",
x="Date") +
scale_y_reverse(breaks=seq(1,51, by=9))
